Importing the Data

library(readxl)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyverse)
## ── Attaching packages ────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.1.1     ✔ readr   1.3.1
## ✔ tibble  2.0.1     ✔ purrr   0.2.5
## ✔ tidyr   0.8.2     ✔ stringr 1.3.1
## ✔ ggplot2 3.1.1     ✔ forcats 0.3.0
## ── Conflicts ───────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(ggplot2)
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
dat <- read_excel("ALL.xlsx")
dat <- select(dat, -Player2, -Player3)

Create Variables we need

#1
  dat <- mutate(dat, RBIAB = RBI / AB)
  dat$RBIAB <- round(dat$RBIAB, digits = 3)
  ##Runs batted in per at bat^
  dat <- mutate(dat, HRAB = HR / AB) 
  dat$HRAB <- round(dat$HRAB, digits = 3)
  ##Home Runs per at bat^
  dat <- mutate(dat, BABIP = (H - HR) / (AB - SO - HR + SF))
  dat$BABIP <- round(dat$BABIP, digits = 3)
  ##Batting Average on Balls in Play^
  dat$NP <- as.numeric(dat$NP) ##Cleaning the Data, changing column from character to Number
## Warning: NAs introduced by coercion
  dat <- mutate(dat, NPPA = NP / PA)
  dat$NPPA <- round(dat$NPPA, digits = 3)
  ##Number of pitches per plate appearance^
  dat <- mutate(dat, NPAB = NP / AB)
  dat$NPAB <- round(dat$NPAB, digits = 3)
  ##Number of Pitches per At bat per at bat^
  dat <- mutate(dat, SOAB = SO / AB)
  dat$SOAB <- round(dat$SOAB, digits = 3)
  ##Number of StrikeOuts per at bat^

Visualize the Data

ggplot(dat, aes(x = AVG)) + geom_histogram(binwidth = .002,color="Black", fill="light blue") + xlab("Batting Average") + ggtitle("Histogram of Batting Average") + ylab("Count")

ggplot(dat, aes(x = OPS)) + geom_histogram(binwidth = .005,color="Black", fill="light green") + xlab("On-Base + Slugging Percentage") + ggtitle("Histogram of OPS") + ylab("Count")

correlationMatrix3 <- select(dat, SLG, RBIAB)
cor(correlationMatrix3)
##             SLG     RBIAB
## SLG   1.0000000 0.7968354
## RBIAB 0.7968354 1.0000000
ggplot(dat, aes(x = RBIAB, y = SLG)) + geom_point(color = 'coral2') + geom_smooth(method='lm',formula=y~x) + xlab("RBI per At bat") + ggtitle("Relationship between Slugging Percentage and RBI's per At Bat") + ylab("Slugging Percentage") + annotate("Text", .325, .55, label = "      R = .796 \n R^2 = .634")

correlationMatrix3 <- select(dat, RBI, SLG)
cor(correlationMatrix3)
##           RBI       SLG
## RBI 1.0000000 0.3106309
## SLG 0.3106309 1.0000000

Does getting more pitches in an at Bat increase the odds of hitting a homerun? PROBABLY WONT USE THIS

ggplot(dat, aes(x = HRAB, y = NPPA)) + geom_point() + geom_smooth(method='lm',formula=y~x) ## R_SQUARED
## Warning: Removed 22 rows containing non-finite values (stat_smooth).
## Warning: Removed 22 rows containing missing values (geom_point).

Which position has the best batting average?

dat2 <- dat
dat2%>%
  select(Pos, AVG) %>%
  mutate(AVG = ifelse(AVG == -9999, NA, AVG)) %>%
  group_by(Pos) %>%
  mutate(MEANAVG = mean(AVG, na.rm = T)) -> dat2
ggplot(dat2, aes(x = AVG)) + geom_histogram(binwidth = .002,color="Black", fill="goldenrod2") + xlab("Batting Average") + ggtitle("Histogram of Batting Average by Position") + ylab("Count") + facet_wrap(~Pos) + geom_vline(aes(xintercept = MEANAVG))

dat3 <- select(dat2, -AVG)
dat3 <- distinct(dat3)  #ROUND TO 3 digits
dat3$MEANAVG <- round(dat3$MEANAVG, digits = 3)

head(dat3, 11)
## # A tibble: 10 x 2
## # Groups:   Pos [10]
##    Pos   MEANAVG
##    <chr>   <dbl>
##  1 1B      0.293
##  2 SS      0.284
##  3 2B      0.289
##  4 3B      0.285
##  5 CF      0.286
##  6 RF      0.284
##  7 LF      0.290
##  8 C       0.288
##  9 DH      0.275
## 10 OF      0.288
# WOW, This makes no sense, as the lowest batting average on the field is the DH and the Right fielder

Teams batting average?

dat5 <- dat
dat5%>%
  select(Team, AVG) %>%
  mutate(AVG = ifelse(AVG == -9999, NA, AVG)) %>%
  group_by(Team) %>%
  mutate(MEANAVG = mean(AVG, na.rm = T)) -> dat5

dat6 <- select(dat5, -AVG)
dat6 <- distinct(dat6)
dat6$MEANAVG <- round(dat6$MEANAVG, digits = 3)

head(dat6, 10)
## # A tibble: 10 x 2
## # Groups:   Team [10]
##    Team  MEANAVG
##    <chr>   <dbl>
##  1 LAD     0.288
##  2 CWS     0.288
##  3 MIN     0.285
##  4 NYM     0.290
##  5 CLE     0.287
##  6 WSH     0.296
##  7 STL     0.297
##  8 TEX     0.279
##  9 TB      0.282
## 10 CHC     0.290

Following Albert Pujols through his career

Albert <- dat[grepl("Pujols,A", dat$Player),]

ggplot(Albert, aes(x = Year)) +
  geom_line(aes( y = AVG), color = "blue") +
  geom_line(aes( y = SLG), color = "red") + 
  geom_line(aes( y = HRAB), color = "purple") + 
  geom_line(aes( y = RBIAB), color = "Orange") +
  ylab("Averages") + 
  xlab("Year") + 
  ggtitle("Albert Pujols's Career mapped by Year") +
  geom_vline(xintercept = 2010, color = "Red")

a <- ggplot(Albert, aes(x = Year)) +
  geom_line(aes( y = HR), color = "blue") +
  geom_line(aes( y = SO), color = "red") + 
  geom_line(aes( y = BB), color = "purple") + 
  geom_line(aes( y = RBI), color = "Orange") +
  ylab("Totals By Category") + 
  xlab("Year") + 
  ggtitle("Albert Pujols's Career mapped by Year") +
  geom_vline(xintercept = 2010, color = "Black") + 
  annotate("Text", 2012.5, 130, label = "Left ALL his fans \nand headed to the Angels. \nAll Stats Plummit")
 ggplotly(a)
Trout <- dat[grepl("Trout,M", dat$Player),]
Trout <- Trout[-c(1),]

b <- ggplot(Trout, aes(x = Year)) +
  geom_line(aes( y = AVG), color = "blue") +
  geom_line(aes( y = BABIP), color = "red") + 
  geom_line(aes( y = HRAB), color = "purple") + 
  geom_line(aes( y = RBIAB), color = "Orange") +
  ylab("Averages") + 
  xlab("Year") + 
  ggtitle("Mike Trout's Career mapped by Year") 

ggplot(Trout, aes(x = Year)) +
  geom_line(aes( y = HR), color = "blue") +
  geom_line(aes( y = SB), color = "red") + 
  geom_line(aes( y = BB), color = "purple") + 
  geom_line(aes( y = RBI), color = "Orange") +
  ylab("Totals By Category") + 
  xlab("Year") + 
  ggtitle("Mike Trout's Career mapped by Year") +
  geom_vline(xintercept = 2017, color = "Black")

Do Homerun hitters have higher Strike Out percentages?

correlationMatrix2 <- select(dat, HRAB, SOAB)
cor(correlationMatrix2)
##           HRAB      SOAB
## HRAB 1.0000000 0.4033054
## SOAB 0.4033054 1.0000000
ggplot(dat, aes(x = HRAB, y = SOAB)) + geom_smooth(method='lm') + geom_point() + xlab("Home Runs Per at Bat") + ggtitle("Homerun Hitters and Strike Out Percentages") + ylab("Strike outs Per at Bat") + annotate("Text", .1225, .370, label = "      R = .402 \n R^2 = .162")

Graph rivalry teams by runs scored.

RedSox <- dat[grepl("BOS", dat$Team),]
Yankees <- dat[grepl("NYY", dat$Team),]
Rivals <- full_join(RedSox, Yankees)
## Joining, by = c("RK", "Player", "Year", "Team", "Pos", "G", "AB", "R", "H", "2B", "3B", "HR", "RBI", "BB", "SO", "SB", "CS", "AVG", "OBP", "SLG", "OPS", "IBB", "HBP", "SAC", "SF", "TB", "XBH", "GDP", "GO", "AO", "GO_AO", "NP", "PA", "RBIAB", "HRAB", "BABIP", "NPPA", "NPAB", "SOAB")
jColors = c("#BD3039", "#0C2340")
ggplot(Rivals, aes(x = HR, y = AVG)) + geom_boxplot() + facet_wrap(~Team) + xlab("Average Home Runs Hit in a Season") + ylab("Batting Average") + ggtitle("Rivaly Between the Boston Red Sox and New York Yankees") + aes(fill = Team) + scale_fill_manual(values = jColors)

Does Avg # of pitches in an at bat correlate with batting Avg? Does it correlate with HRs?

##I CAN"T GET THE R STAT FOR THESE, SO WE WONT BE ABLE TO GET THE R^2

#This one proves the Null Hypothesis, that there is no Correlation
ggplot(dat, aes(x = AVG, y = NPAB)) + geom_smooth(method='lm') + geom_point() + xlab("Batting Average") + ggtitle("Relationship Between Average Number of Pitches in an at Bat to Batting Average") + ylab("Average Number of Pitches During an at Bat")## R_SQUARED
## Warning: Removed 22 rows containing non-finite values (stat_smooth).
## Warning: Removed 22 rows containing missing values (geom_point).

#This one has a slight positive correlation
ggplot(dat, aes(x = HRAB, y = NPAB)) + geom_smooth(method='lm') + geom_point() + xlab("Average Homeruns in an at Bat") + ggtitle("Relationship Between Average Number of Pitches in an at Bat to Batting Average") + ylab("Average Number of Pitches During an at Bat")## R_SQUARED
## Warning: Removed 22 rows containing non-finite values (stat_smooth).

## Warning: Removed 22 rows containing missing values (geom_point).

Distribution of BABIP with Buster Posey Highlighted

Posey <- dat[grepl("Posey,B", dat$Player),]
MeanBABIPposey <- mean(Posey$BABIP)

ggplot(dat, aes(x = BABIP,)) + geom_histogram(color = "black", fill = "white") +
  geom_vline(xintercept = MeanBABIPposey, color = "Red") +
  annotate("Text", .38, 135, label = "Buster Posey's Average BABIP")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Which teams hit the most homeruns?

dat7 <- dat

dat7%>%
  select(Team, HRAB) %>%
  mutate(HRAB = ifelse(HRAB == -9999, NA, HRAB)) %>%
  group_by(Team) %>%
  mutate(MeanHR = mean(HRAB, na.rm = T)) -> dat7

dat8 <- select(dat7, -HRAB)
dat8 <- distinct(dat8)  #ROUND TO 3 digits
dat8$MeanHR <- round(dat8$MeanHR, digits = 3)
head(dat8, 10)
## # A tibble: 10 x 2
## # Groups:   Team [10]
##    Team  MeanHR
##    <chr>  <dbl>
##  1 LAD    0.044
##  2 CWS    0.033
##  3 MIN    0.034
##  4 NYM    0.032
##  5 CLE    0.035
##  6 WSH    0.039
##  7 STL    0.044
##  8 TEX    0.037
##  9 TB     0.036
## 10 CHC    0.041
dat8 <- dat8[order(dat8$MeanHR, decreasing = TRUE),] 

head(dat8, 33)
## # A tibble: 33 x 2
## # Groups:   Team [33]
##    Team  MeanHR
##    <chr>  <dbl>
##  1 TOR    0.049
##  2 BAL    0.045
##  3 MIL    0.045
##  4 LAD    0.044
##  5 STL    0.044
##  6 FLA    0.042
##  7 CHC    0.041
##  8 COL    0.04 
##  9 SEA    0.04 
## 10 WSH    0.039
## # … with 23 more rows
library("Hmisc")
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
## 
## Attaching package: 'Hmisc'
## The following object is masked from 'package:plotly':
## 
##     subplot
## The following objects are masked from 'package:dplyr':
## 
##     src, summarize
## The following objects are masked from 'package:base':
## 
##     format.pval, units
library(corrplot)
## corrplot 0.84 loaded
library(plotly)
cordata <- select(dat, AVG, OBP, SLG, BABIP, OPS, HR, AO, SO, RBI, BB, XBH, NPPA, SB)
cordata <- cor(cordata, use = "complete.obs")
cordata.rcorr = rcorr(as.matrix(cordata))
cordata.rcorr
##         AVG   OBP   SLG BABIP   OPS    HR    AO    SO   RBI    BB   XBH
## AVG    1.00  0.83  0.67  0.78  0.75 -0.42 -0.74 -0.85 -0.57 -0.62 -0.61
## OBP    0.83  1.00  0.79  0.66  0.90 -0.26 -0.80 -0.73 -0.52 -0.26 -0.57
## SLG    0.67  0.79  1.00  0.33  0.98  0.22 -0.57 -0.49 -0.11 -0.18 -0.17
## BABIP  0.78  0.66  0.33  1.00  0.46 -0.71 -0.88 -0.66 -0.81 -0.68 -0.82
## OPS    0.75  0.90  0.98  0.46  1.00  0.07 -0.67 -0.59 -0.25 -0.21 -0.31
## HR    -0.42 -0.26  0.22 -0.71  0.07  1.00  0.60  0.65  0.92  0.72  0.90
## AO    -0.74 -0.80 -0.57 -0.88 -0.67  0.60  1.00  0.74  0.84  0.64  0.87
## SO    -0.85 -0.73 -0.49 -0.66 -0.59  0.65  0.74  1.00  0.77  0.71  0.80
## RBI   -0.57 -0.52 -0.11 -0.81 -0.25  0.92  0.84  0.77  1.00  0.75  0.99
## BB    -0.62 -0.26 -0.18 -0.68 -0.21  0.72  0.64  0.71  0.75  1.00  0.75
## XBH   -0.61 -0.57 -0.17 -0.82 -0.31  0.90  0.87  0.80  0.99  0.75  1.00
## NPPA  -0.07  0.43  0.27  0.18  0.34 -0.06 -0.48 -0.03 -0.29  0.22 -0.32
## SB    -0.31 -0.53 -0.75 -0.09 -0.71 -0.44  0.26  0.10 -0.19 -0.16 -0.11
##        NPPA    SB
## AVG   -0.07 -0.31
## OBP    0.43 -0.53
## SLG    0.27 -0.75
## BABIP  0.18 -0.09
## OPS    0.34 -0.71
## HR    -0.06 -0.44
## AO    -0.48  0.26
## SO    -0.03  0.10
## RBI   -0.29 -0.19
## BB     0.22 -0.16
## XBH   -0.32 -0.11
## NPPA   1.00 -0.40
## SB    -0.40  1.00
## 
## n= 13 
## 
## 
## P
##       AVG    OBP    SLG    BABIP  OPS    HR     AO     SO     RBI   
## AVG          0.0005 0.0120 0.0016 0.0029 0.1485 0.0039 0.0002 0.0426
## OBP   0.0005        0.0012 0.0132 0.0000 0.3980 0.0009 0.0048 0.0717
## SLG   0.0120 0.0012        0.2727 0.0000 0.4689 0.0436 0.0906 0.7119
## BABIP 0.0016 0.0132 0.2727        0.1180 0.0064 0.0000 0.0145 0.0008
## OPS   0.0029 0.0000 0.0000 0.1180        0.8107 0.0119 0.0336 0.4074
## HR    0.1485 0.3980 0.4689 0.0064 0.8107        0.0289 0.0157 0.0000
## AO    0.0039 0.0009 0.0436 0.0000 0.0119 0.0289        0.0039 0.0003
## SO    0.0002 0.0048 0.0906 0.0145 0.0336 0.0157 0.0039        0.0021
## RBI   0.0426 0.0717 0.7119 0.0008 0.4074 0.0000 0.0003 0.0021       
## BB    0.0237 0.3993 0.5617 0.0101 0.4875 0.0057 0.0195 0.0060 0.0029
## XBH   0.0278 0.0425 0.5675 0.0006 0.2974 0.0000 0.0000 0.0011 0.0000
## NPPA  0.8092 0.1385 0.3713 0.5596 0.2596 0.8516 0.1005 0.9131 0.3401
## SB    0.2979 0.0597 0.0032 0.7747 0.0060 0.1313 0.3936 0.7466 0.5304
##       BB     XBH    NPPA   SB    
## AVG   0.0237 0.0278 0.8092 0.2979
## OBP   0.3993 0.0425 0.1385 0.0597
## SLG   0.5617 0.5675 0.3713 0.0032
## BABIP 0.0101 0.0006 0.5596 0.7747
## OPS   0.4875 0.2974 0.2596 0.0060
## HR    0.0057 0.0000 0.8516 0.1313
## AO    0.0195 0.0000 0.1005 0.3936
## SO    0.0060 0.0011 0.9131 0.7466
## RBI   0.0029 0.0000 0.3401 0.5304
## BB           0.0030 0.4658 0.5974
## XBH   0.0030        0.2921 0.7212
## NPPA  0.4658 0.2921        0.1775
## SB    0.5974 0.7212 0.1775
palette = colorRampPalette(c("blue", "yellow", "red")) (20)
corrplot(cordata, col = palette) #dot graph

heatmap(x = cordata, col = palette, symm = TRUE) #heatmap

#go back and change color scheme

Chloropleth map

Stadiums <- read_excel("StadiumLocations.xlsx")

View(Stadiums)

states <- map_data("state")
## 
## Attaching package: 'maps'
## The following object is masked from 'package:purrr':
## 
##     map
base <- ggplot(states, aes(x=long, y=lat))+geom_polygon(aes(group=group))+ggtitle("MLB Teams")+ylab("_")+xlab(" ")
base

#Change this to look better, put labels, put colors, 
#In the excel file, add averages for each team, make sure team names are able so we can display them

LAA <- subset(Stadiums, Stadiums$ABB == "LAA")
SF <- subset(Stadiums, Stadiums$ABB == "SF")
COL <- subset(Stadiums, Stadiums$ABB == "COL")
STL <- subset(Stadiums, Stadiums$ABB == "STL")
ARI <- subset(Stadiums, Stadiums$ABB == "ARI")
LAD <- subset(Stadiums, Stadiums$ABB == "LAD")
NYM <- subset(Stadiums, Stadiums$ABB == "NYM")
PHI <- subset(Stadiums, Stadiums$ABB == "PHI")
DET <- subset(Stadiums, Stadiums$ABB == "DET")
BOS <- subset(Stadiums, Stadiums$ABB == "BOS")
TEX <- subset(Stadiums, Stadiums$ABB == "TEX")
CIN <- subset(Stadiums, Stadiums$ABB == "CIN")
KC <- subset(Stadiums, Stadiums$ABB == "KC")
MIA <- subset(Stadiums, Stadiums$ABB == "MIA")
MIL <- subset(Stadiums, Stadiums$ABB == "MIL")
HOU <- subset(Stadiums, Stadiums$ABB == "HOU")
WSH <- subset(Stadiums, Stadiums$ABB == "WSH")
OAK <- subset(Stadiums, Stadiums$ABB == "OAK")
BAL <- subset(Stadiums, Stadiums$ABB == "BAL")
SD <- subset(Stadiums, Stadiums$ABB == "SD")
PIT <- subset(Stadiums, Stadiums$ABB == "PIT")
CLE <- subset(Stadiums, Stadiums$ABB == "CLE")
TOR <- subset(Stadiums, Stadiums$ABB == "TOR")
SEA <- subset(Stadiums, Stadiums$ABB == "SEA")
MIN <- subset(Stadiums, Stadiums$ABB == "MIN")
TB <- subset(Stadiums, Stadiums$ABB == "TB")
ATL <- subset(Stadiums, Stadiums$ABB == "ATL")
CWS <- subset(Stadiums, Stadiums$ABB == "CWS")
CHC <- subset(Stadiums, Stadiums$ABB == "CHC")
NYY <- subset(Stadiums, Stadiums$ABB == "NYY")

half1 <- base+geom_point(aes(x=Longitude, y=Latitude),data=LAA, colour="#003263", size=7)+geom_point(aes(x=Longitude, y=Latitude),data=LAA, color="#BA0021", size =5)+ geom_point(aes(x=Longitude, y=Latitude),data=SF, colour="#FD5A1E", size=7)+geom_point(aes(x=Longitude, y=Latitude),data=SF, color="#27251F", size =5)+ geom_point(aes(x=Longitude, y=Latitude),data=COL, colour="#C4CED4", size=7)+geom_point(aes(x=Longitude, y=Latitude),data=COL, color="#33006F", size =5)+ geom_point(aes(x=Longitude, y=Latitude),data=STL, colour="#0C2340", size=7)+geom_point(aes(x=Longitude, y=Latitude),data=STL, color="#C41E3A", size =5)+ geom_point(aes(x=Longitude, y=Latitude),data=ARI, colour="#E3D4AD", size=7)+geom_point(aes(x=Longitude, y=Latitude),data=ARI, color="#A71930", size =5)+ geom_point(aes(x=Longitude, y=Latitude),data=LAD, colour="#EF3E42", size=7)+geom_point(aes(x=Longitude, y=Latitude),data=LAD, color="#005A9C", size =5)+ geom_point(aes(x=Longitude, y=Latitude),data=NYM, colour="#FF5910", size=7)+geom_point(aes(x=Longitude, y=Latitude),data=NYM, color="#002D72", size =5)+ geom_point(aes(x=Longitude, y=Latitude),data=PHI, colour="#002D72", size=7)+geom_point(aes(x=Longitude, y=Latitude),data=PHI, color="#E81828", size =5)+ geom_point(aes(x=Longitude, y=Latitude),data=DET, colour="#FA4616", size=7)+geom_point(aes(x=Longitude, y=Latitude),data=DET, color="#0C2340", size =5)+ geom_point(aes(x=Longitude, y=Latitude),data=BOS, colour="#0C2340", size=7)+geom_point(aes(x=Longitude, y=Latitude),data=BOS, color="#BD3039", size =5)+ geom_point(aes(x=Longitude, y=Latitude),data=TEX, colour="#C0111F", size=7)+geom_point(aes(x=Longitude, y=Latitude),data=TEX, color="#003278", size =5)+ geom_point(aes(x=Longitude, y=Latitude),data=CIN, colour="white", size=7)+geom_point(aes(x=Longitude, y=Latitude),data=CIN, color="#C6011F", size =5)+ geom_point(aes(x=Longitude, y=Latitude),data=KC, colour="#BD9B60", size=7)+geom_point(aes(x=Longitude, y=Latitude),data=KC, color="#004687", size =5)+ geom_point(aes(x=Longitude, y=Latitude),data=KC, colour="#BD9B60", size=7)+geom_point(aes(x=Longitude, y=Latitude),data=KC, color="#004687", size =5)+ geom_point(aes(x=Longitude, y=Latitude),data=MIA, colour="#FF6600", size=7)+geom_point(aes(x=Longitude, y=Latitude),data=MIA, color="#0077C8", size =5)

whole <- half1 + geom_point(aes(x=Longitude, y=Latitude),data=MIL, colour="#B6922E", size=7)+geom_point(aes(x=Longitude, y=Latitude),data=MIL, color="#0A2351", size =5)+ geom_point(aes(x=Longitude, y=Latitude),data=HOU, colour="#EB6E1F", size=7)+geom_point(aes(x=Longitude, y=Latitude),data=HOU, color="#002D62", size =5)+ geom_point(aes(x=Longitude, y=Latitude),data=WSH, colour="#14225A", size=7)+geom_point(aes(x=Longitude, y=Latitude),data=WSH, color="#AB0003", size =5)+ geom_point(aes(x=Longitude, y=Latitude),data=OAK, colour="#EFB21E", size=7)+geom_point(aes(x=Longitude, y=Latitude),data=OAK, color="#003831", size =5)+ geom_point(aes(x=Longitude, y=Latitude),data=BAL, colour="#000000", size=7)+geom_point(aes(x=Longitude, y=Latitude),data=BAL, color="#DF4601", size =5)+ geom_point(aes(x=Longitude, y=Latitude),data=SD, colour="#A2AAAD", size=7)+geom_point(aes(x=Longitude, y=Latitude),data=SD, color="#002D62", size =5)+ geom_point(aes(x=Longitude, y=Latitude),data=PIT, colour="#FDB827", size=7)+geom_point(aes(x=Longitude, y=Latitude),data=PIT, color="#27251F", size =5)+ geom_point(aes(x=Longitude, y=Latitude),data=CLE, colour="#E31937", size=7)+geom_point(aes(x=Longitude, y=Latitude),data=CLE, color="#0C2340", size =5)+ geom_point(aes(x=Longitude, y=Latitude),data=TOR, colour="#1D2D5C", size=7)+geom_point(aes(x=Longitude, y=Latitude),data=TOR, color="#134A8E", size =5)+ geom_point(aes(x=Longitude, y=Latitude),data=SEA, colour="#005C5C", size=7)+geom_point(aes(x=Longitude,y=Latitude),data=SEA,color="#0C2C56", size =5)+ geom_point(aes(x=Longitude, y=Latitude),data=MIN, colour="#D31145", size=7)+geom_point(aes(x=Longitude, y=Latitude),data=MIN, color="#002B5C", size =5)+ geom_point(aes(x=Longitude, y=Latitude),data=TB, colour="#8FBCE6", size=7)+geom_point(aes(x=Longitude, y=Latitude),data=TB, color="#092C5C", size =5)+ geom_point(aes(x=Longitude, y=Latitude),data=ATL, colour="#13274F", size=7)+geom_point(aes(x=Longitude, y=Latitude),data=ATL, color="#CE1141", size =5)+ geom_point(aes(x=Longitude, y=Latitude),data=CWS, colour="#C4CED4", size=7)+geom_point(aes(x=Longitude, y=Latitude),data=CWS, color="#27251F", size =5)+ geom_point(aes(x=Longitude, y=Latitude),data=CHC, colour="#CC3433", size=7)+geom_point(aes(x=Longitude, y=Latitude),data=CHC, color="#0E3386", size =5)+ geom_point(aes(x=Longitude, y=Latitude),data=NYY, colour="#0C2340", size=7)+geom_point(aes(x=Longitude, y=Latitude),data=NYY, color="red", size =5)


library(ggrepel)
#change team names to just team, not with city?
#add batting avgs
#change colors and size of text
whole+geom_label(aes(x=Longitude, y=Latitude), data=Stadiums %>% filter(Longitude <0 & Longitude> -130), label=Stadiums$`ABR.`, color="black", size=3, nudge_y = 1, label.padding = unit(.1, "lines"))+geom_text(aes(x=Longitude, y=Latitude), data=Stadiums %>% filter(Longitude <0 & Longitude> -130), label=Stadiums$`AVG`, color="red", size=2, nudge_y = -1.5)

#ggplot(states, aes(x=long, y=lat))+geom_polygon(aes(group=group))+geom_point(aes(x=Longitude, y=Latitude), data=Stadiums %>% filter(Longitude <0 & Longitude> -130), size=2, color="red") +geom